***************************************************************************************************
* This replication file estimates a welfare entry counterfactual with a potential price reduction *
***************************************************************************************************
*1. Re-calibrate parameters to reflect price reduction.
*2. Estimate and store information on the status quo for comparison.
*3. Estimate the counterfactual distribution of firms under welfare maximization.
*4. Store the outcomes.
***********************
* 0 Set path to files *
***********************
clear all
capture log close 
set more off
set trace off
mata: mata set matastrict off
cd $main_directory


sca counter_offices=1 //Set this variable to 1 if counterfactual office locations in the addresses of postoffices

/*
// Option 1: Parallel computing
//The variable PBS_ARRAY_INDEX refers to a row index of a matrix, containing the district identifier in the first column, the price reduction for real-estate in the second column and the price reduction for other transactions in the third.
//Thus, the matrix contains all the combinations of district and price reduction that we look at in order to find the optimal price reduction under free entry.
global selection_counterfactual : env PBS_ARRAY_INDEX
mata: mata matuse "Estimates/counterfactual_grid_welfare", replace
mata:    st_numscalar("district",counterfactual_grid[`=$selection_counterfactual',1])
mata:    st_numscalar("reduction_P1",counterfactual_grid[`=$selection_counterfactual',2])
mata:    st_numscalar("reduction_P2",counterfactual_grid[`=$selection_counterfactual',3])
*/

//Option 2: Manual input of district identifier and price reduction levels
sca district=5 //The 5-th district is Leuven, a relatively small district which computes fairly quickly and can thus be used to test the code.
sca reduction_P1=17 //Price reduction for real estate transactions
sca reduction_P2=17 //Price reduction for other transactions

//Set predetermined values:
global select_arrond `=district'
global reduction_level_Q1 `=reduction_P1'
global reduction_level_Q2 `=reduction_P2'
global starting_value `" 1 "' //If this is set to 1, then we begin with the status quo firm distribution starting values.


**************************************************************************************
* 1. Load the parameters and re-calibrate the intercept to reflect price reductions. *
**************************************************************************************

use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear

if counter_offices==1 {
append using "Data_Belgium\Generated_data\baseline_dataset_counter_post_offices.dta"
}
qui do "Command_files\spatial_demand_Q.do"

//Set predetermined values:
sca scale=10
set seed 1234
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
mata: lambda=1 //This is the weight put on consumer surplus when calculating welfare.
mata: profit_constraint=1 //a value of 1 means that firms have to make a non-negative profit in the equilibrium (i.e. the industry is private and not subsidized).


if $select_arrond == 3 {//The third and seventh district are joined together legally.
global select_arrond_secondary `" 7 "' 
}
else{
global select_arrond_secondary `" 0 "' 
}


//If a price change is planned, make sure that the price change is reflected in the variables 
mata: change_p_Q1=1-`=$reduction_level_Q1'/100
mata: change_p_Q2=1-`=$reduction_level_Q2'/100
mata: p_Q1_new=change_p_Q1*p_Q1 //Average price
mata: p_Q2_new=change_p_Q2*p_Q2 //Average price
//The new vector will be adjusted to reflect lower prices
mata: betas_Q1_new=betas_Q1
mata: betas_Q2_new=betas_Q2 
mata:    st_numscalar("p_Q1_new",p_Q1_new)
mata:    st_numscalar("p_Q2_new",p_Q2_new)
//beta_star reflects how much a Euro of transportation costs is worth in terms of utility
mata: beta_star_Q1=betas_Q1[1]/transportation_cost //The transportation costs can be either in thousands or in other units - this does not matter, as long as the price has a similar term of measurement.
mata: beta_star_Q2=betas_Q2[1]/transportation_cost //The transportation costs can be either in thousands or in other units - this does not matter, as long as the price has a similar term of measurement.
//gamma_star reflects the intercept net of price effects
mata: gamma_star_Q1=betas_Q1[rows(betas_Q1)-1]-beta_star_Q1*p_Q1
mata: gamma_star_Q2=betas_Q2[rows(betas_Q2)-1]-beta_star_Q2*p_Q2
//The new intercept is gamma_star plus the new price effects
mata: betas_Q1_new[rows(betas_Q1_new)-1]=gamma_star_Q1+beta_star_Q1*p_Q1_new
mata: betas_Q2_new[rows(betas_Q2_new)-1]=gamma_star_Q2+beta_star_Q2*p_Q2_new

***********************************************************************
* 2. Estimate and store information on the status quo for comparison. *
***********************************************************************
//Calculate the total demand for real estate on the market
qui su Q1 if common==1
sca sumQ1=r(sum)
sca di sumQ1
//Calculate the total demand for other transactions on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2
//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation
//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q1=scale*sumQ1/sumPopulation
sca di gamma_Q1
scalar gamma_Q2=scale*sumQ2/sumPopulation
sca di gamma_Q2
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q1=gamma_Q1*population
gen market_size_Q2=gamma_Q2*population

//Define the product for which demand will be estimated.
gen firm_demand_Q1=Q1 
gen firm_demand_Q2=Q2

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)
//Keep only observations in the selected judicial district
keep if arrondjud==$select_arrond | arrondjud==$select_arrond_secondary


// Controls:
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total_Q1 newid firm_demand_Q1 market_ID_num market_size_Q1 $controls_utility
global controls_total_Q2 newid firm_demand_Q2 market_ID_num market_size_Q2 $controls_utility

//Define newid, which is just firm_IDs which go from 1 to 1152.
egen newid = group(firm_ID)
// Generate a unique observation identifier, which is used to merge the results from the counterfactuals with the initial dataset
tostring market_ID_num, gen(market_ID_string)
tostring newid, gen(newid_string)
gen join="_"
egen unique_identifier=concat(newid_string join market_ID_string)
drop join market_ID_string newid_string

// Count the number of firms in the sample
bysort firm_ID:  gen dup_firm = cond(_N==1,0,_n)
count if dup_firm<2
sca n_firm=r(N)
// Count the number of markets in the subsample
bysort market_ID: gen nvals = _n == 1 
count if nvals
sca markets_number=r(N)
di markets_number
drop nvals
* Count the number of markets with a local notary
preserve
keep if common==1 //This denotes the local market of a notary
bysort market_ID: gen nvals = _n == 1 
count if nvals
sca covered_markets_current=r(N)
sca uncovered_markets_current=markets_number-covered_markets_current
su NotPerOff
sca total_notaries_current=r(sum)
restore

//STATUS QUO ESTIMATION AND DESCRIPTIVES
* Sort the data by market ID, since this will be the first relevant grouping (to calculate notary utility within a municipality)
sort market_ID_num newid
mata:
// Select the relevant variables and read them into mata
X_Q1    = st_data(., "$controls_total_Q1")
X_Q2    = st_data(., "$controls_total_Q2")
// nx counts the number of observations
nx    = rows(X_Q1)
// Add an intercept, with length equal to the number of observations and 
X_Q1    = X_Q1,J(nx,1,1)
X_Q2    = X_Q2,J(nx,1,1)

// Create relevant market measures based on the current distribution of firms and current prices.
n_firm=st_numscalar("n_firm")
total_notaries_current=st_numscalar("total_notaries_current")
Results_ln_current_Q1=spdemand_log(X_Q1,betas_Q1)
X_Q1=sort(X_Q1,(3,1)) //sort by market and then firm_id
Results_ln_current_Q2=spdemand_log(X_Q2,betas_Q2)
X_Q2=sort(X_Q2,(3,1)) //sort by market and then firm_id
Results_current_Q1=exp(Results_ln_current_Q1[,2])
Results_current_Q2=exp(Results_ln_current_Q2[,2])
Results_current=Results_current_Q1+Results_current_Q2
output_current=round(sum(Results_current))
consumer_surplus_market_current=consumer_surplus(X_Q1,betas_Q1,transportation_cost)+consumer_surplus(X_Q2,betas_Q2,transportation_cost)
per_firm_variable_profit_current=per_firm_profit_mult_v2(X_Q1,X_Q2,betas_Q1,betas_Q2,p_Q1,p_Q2,nu,aL,phi,aM,alpha)
consumer_surplus_current=round(sum(consumer_surplus_market_current[,2]))
producer_surplus_current=round(sum(per_firm_variable_profit_current[,2])-fixed_cost*total_notaries_current)
welfare_current=round(consumer_surplus_current+producer_surplus_current)
   st_numscalar("output_current", output_current)
   st_numscalar("consumer_surplus_current", consumer_surplus_current)
   st_numscalar("producer_surplus_current", producer_surplus_current)
   st_numscalar("welfare_current", welfare_current)
//Generate a vector with the number of notaries in each office for every office-market combination
n_notaries=J(nx,1,0)
n_notaries=round(exp(X_Q1[.,6]))
//Generate a vector with the number of notaries for every office
notary_vector=J(n_firm,1,0) //Make an empty notary vector
//Calculate the actual notaries per office under the status quo
for(f=1; f<=n_firm; f++) {
notary_vector[f,1]=sum(n_notaries:*(X_Q1[.,1]:==f))/(sum((X_Q1[.,1]:==f))) //This is essentially calculating the average number of notaries across all observations for the same notary office, which is just the number of notaries for that office.
}
	end



//STATUS QUO AFTER PRICE CHANGE
//All variables are marked with "L" to denote that this is after the lowering of prices
* Sort the data by market ID, since this will be the first relevant grouping (to calculate notary utility within a municipality)
sort market_ID_num newid
mata:
// Select the relevant variables and read them into mata
X_Q1    = st_data(., "$controls_total_Q1")
X_Q2    = st_data(., "$controls_total_Q2")
// nx counts the number of observations
nx    = rows(X_Q1)
// Add an intercept, with length equal to the number of observations and 
X_Q1    = X_Q1,J(nx,1,1)
X_Q2    = X_Q2,J(nx,1,1)
//Calculate output
Results_ln_current_Q1L=spdemand_log(X_Q1,betas_Q1_new)
Results_ln_current_Q2L=spdemand_log(X_Q2,betas_Q2_new)
Results_current_Q1L=exp(Results_ln_current_Q1L[,2])
Results_current_Q2L=exp(Results_ln_current_Q2L[,2])
Results_currentL=Results_current_Q1L+Results_current_Q2L
output_currentL=round(sum(Results_currentL))
//Calculate profits per firm as well as industry profits
per_firm_variable_profit_curL=per_firm_profit_mult_v2(X_Q1,X_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha) //"curL" stands for "currentL". There is a limit to the number of characters in the names of mata matrices.
producer_surplus_currentL=round(sum(per_firm_variable_profit_curL[,2])-fixed_cost*total_notaries_current)
//Calculate consumer surplus
consumer_surplus_market_currentL=consumer_surplus(X_Q1,betas_Q1_new,transportation_cost)+consumer_surplus(X_Q2,betas_Q2_new,transportation_cost)
consumer_surplus_currentL=round(sum(consumer_surplus_market_currentL[,2]))
welfare_currentL=round(lambda*consumer_surplus_currentL+producer_surplus_currentL)

//Make a vector with the starting number of notaries on the market. If starting value=1, then we start at the status quo. Otherwise, we start of zero entrants.  
n_notaries=J(nx,1,0)
if ($starting_value == 1) {
n_notaries=round(exp(X_Q1[.,6])) //The sixth column of X stores the log of the number of notaries
notary_vector=J(n_firm,1,0) //Make an empty notary vector
//Calculate the actual notaries per office right now
for(f=1; f<=n_firm; f++) {
notary_vector[f,1]=sum(n_notaries:*(X_Q1[.,1]:==f))/(sum((X_Q1[.,1]:==f)))
}
}

//Set the starting values for profits per firm, as well as welfare in the district.
firm_profit_old=per_firm_variable_profit_curL[.,2]-fixed_cost*notary_vector //This is the vector of current profits, which will be updated with each round of added notaries.
industry_profit=J(n_firm,1,0)
consumer_surplus=J(n_firm,1,0)
welfare_old=welfare_currentL //This is total welfare, which will be updated with each round of added notaries.
welfare_new=J(n_firm,1,0) //This is a vector holding the counterfactual information on welfare if entry occurs in a given location.
overall_change=100 //This variable is going to measure whether an extra loop of calculation of welfare is necessary to check for the need of additional entry or exit. We set it to a positive number (e.g. 100) at the beginning, in order to make sure that we perform at least one round of checking for potential entry and exit under the new counterfactual.
round=0
	end


*****************************************
* 3. Run the counterfactual estimation. *
*****************************************	
	
mata:
while (overall_change!=0) { //This checks whether there was a change in the distribution of notaries in the last round of additions/removals of notaries.
notary_vector_loop_start=notary_vector

//*******************************************************
//* STEP 2: REMOVE NOTARIES WHICH DON'T ENHANCE WELFARE *
//*******************************************************
notary_vector_prior_to_removal=notary_vector
extra_welfare_removal=100 //This variable denotes the welfare increase by removing the least welfare-enhancing notary. The value of 100 will be replaced with the actual welfare change during the first round of the loop.

//Beginning of loop which checks if removing a notary from a location is welfare-enhancing.
while (extra_welfare_removal>0) { //The loop goes on, as long as exit raises the welfare.
//This matrix will keep track of how the profit of the individual notary office changes with the removal of a notary
firm_profit_new=J(n_firm,1,0)
consumer_surplus=J(n_firm,1,0)
industry_profit=J(n_firm,1,0)

//Remove a notary from each alternative location and see how that would change welfare.
for(i=1; i<=n_firm; i++) { //i goes over the notaries
if (notary_vector[i,1]>0) { //This condition checks that there is at least 1 notary in the office, since otherwise removal does not make sense.
//Remove a notary from office i
notary_vector[i,1]=	notary_vector[i,1]-1 //Remove the notary to the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
//Calculation of welfare when the chosen office has n-1 notaries
X_selection1_Q1=select(X_Q1, X_Q1[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q1=select(X_selection1_Q1, X_selection1_Q1[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
X_selection1_Q2=select(X_Q2, X_Q2[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q2=select(X_selection1_Q2, X_selection1_Q2[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
//Calculate firm profits after the exit at location i
per_firm_variable_profit=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha) //Calculate per firm variable profits
//Make an zero vector of per-firm profits (we do this in order to get the ordering of the profit data right)
per_firm_profit=range(1,rows(notary_vector),1),J(rows(notary_vector),1,0)
//Fill in the profits of active firms 
for (f=1; f<=rows(per_firm_variable_profit); f++){
id_of_firm=per_firm_variable_profit[f,1]
per_firm_profit[id_of_firm,2]=per_firm_variable_profit[f,2]-fixed_cost*notary_vector[id_of_firm,1] //Subtract the fixed costs to obtain total firm profits
} 
industry_profit[i,1]=sum(per_firm_profit[,2])
//Store the information on how exit affects the profits of the specific notary office where the exit happened
firm_profit_new[i,1]=per_firm_profit[i,2]

//Calculate consumer surplus
consumer_surplus_per_market=consumer_surplus(X_Q1,betas_Q1_new,transportation_cost)+consumer_surplus(X_Q2,betas_Q2_new,transportation_cost)
consumer_surplus[i,1]=sum(consumer_surplus_per_market[,2]) //Calculate total surplus when a notary is added to location i.

//Add back the removed notary to get back to the original data.
notary_vector[i,1]=	notary_vector[i,1]+1 //Add the notary to the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
} //End of loop that checks that there is at least 1 notary in the location to begin with (since otherwise removing does not make sense).
} //End of loop which goes over all observations.

//Calculate the new welfare due to the removal in each office
welfare_new=industry_profit+lambda*consumer_surplus
//Calculate the marginal effect of the removal, both for welfare and for the profits of the individual notary
welfare_removal=welfare_new-J(rows(welfare_new),1,welfare_old) //Calculate the change in welfare due to removing one notary from each location.
profit_removal=firm_profit_new-firm_profit_old

//// CHECKING IF REMOVAL IS WELFARE ENHANCING AND REMOVING NOTARIES IF NECESSARY
//If the profit constraint is assumed to hold, notaries will exit either because they are welfare-decreasing or because they are profit-decreasing.
if (profit_constraint==1) {
for(i=1; i<=n_firm; i++) { //i goes over the notaries
//Replace the welfare from removal with a high value if the firm is making negative profits. In that case, a notary will be removed from that office, regardless of whether it is good for welfare or not.
//Note that only notaries with a positive contribution through removal will exit. This means that profitability will only matter if profits are negative when the notary is active.
if (firm_profit_old[i,1]<0){
welfare_removal[i,1]=-firm_profit_old[i,1]*9000 //This substitution means that the removal of unprofitable notaries will take place before the removal of welfare-decreasing notaries.
//Note that we are calculating the surplus in thousands, so the multiplication by 9000 indicates that the removal of a notary making a loss of 1000 Euro is assumed to contribute to welfare gains of 9 million Euro. This ensures that it is removed first, allowing us to keep the profit constraint. If your analysis has a different scale, you may need to raise this number.
}
}
}	

//Find the location at which entry is least welfare-enhancing and denote it with max_ID (since exiting there gives the maximum increase in welfare)
maxindex(welfare_removal, 1, max_ID=., junk=.) //This returns the ID of the least welfare enhancing entrant as max_ID (since exiting there gives the maximum increase in welfare).
//Calculate the extra welfare which is generated through exit in a given location
max_ID=max_ID[1,1]
extra_welfare_removal=welfare_removal[max_ID,1]

//Remove one notary to the max_ID location, as long as it increases welfare.
if (extra_welfare_removal>0) { //Start of condition checking that welfare gains from exit are positive and removing a notary from the most welfare-decreasing location.
notary_vector[max_ID,1]=notary_vector[max_ID,1]-1 //Remove the notary from the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==max_ID),ln(notary_vector[max_ID,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==max_ID),ln(notary_vector[max_ID,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
//Store the expected welfare if exit happens at max_ID as the new current welfare. Note that this is not affected by the replacement we made within the profit constraint, since that only changes the marginal contribution, not the total welfare.
welfare_old=welfare_new[max_ID,1] 
// Calculate the per-firm profits after the exit of the notary.
//Calculation of profits for all offices when the chosen office has n-1 notaries
X_selection1_Q1=select(X_Q1, X_Q1[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q1=select(X_selection1_Q1, X_selection1_Q1[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
X_selection1_Q2=select(X_Q2, X_Q2[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q2=select(X_selection1_Q2, X_selection1_Q2[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
//Calculate variable profits per firm
per_firm_variable_profit=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha) //Calculate per-firm variable profits
//Make a matrix in which  to store the profits of the firms. The first column contains the id of the firm, while the second column contains the profits (initially set to zero).
per_firm_profit=range(1,rows(notary_vector),1),J(rows(notary_vector),1,0)
//Fill in the profits of active firms 
for (f=1; f<=rows(per_firm_variable_profit); f++){
id_of_firm=per_firm_variable_profit[f,1]
per_firm_profit[id_of_firm,2]=per_firm_variable_profit[f,2]-fixed_cost*notary_vector[id_of_firm,1] //Subtract the fixed costs to obtain total firm profits
} //End of loop filling in available profits
//Store firm profits as firm_profit_old. 
firm_profit_old=per_firm_profit[.,2]
} //End of condition checking that the welfare change from exit is positive and removing a notary from the least welfare-enhancing location.
} //End of loop checking if there is any need for exit.

//**********************************************
//* STEP 3: ADD NOTARIES WHICH ENHANCE WELFARE *
//**********************************************
//Checking if adding notaries is welfare enhancing:
extra_welfare=100 //This variable denotes the welfare increase of the last entrant. The value of 100 will be replaced with actual welfare increase during the first round of the loop.
while (extra_welfare>0) { //The loop goes on, as long as additional entry raises welfare.
//This matrix will keep track of how the profit of the individual notary office changes with the addition of a notary
firm_profit_new=J(n_firm,1,0)
consumer_surplus=J(n_firm,1,0)
industry_profit=J(n_firm,1,0)		
//Add a notary to each alternative location and see how that would change welfare.
for(i=1; i<=n_firm; i++) { //i goes over the notaries
//Add a notary to office i
notary_vector[i,1]=	notary_vector[i,1]+1 //Add the notary to the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
//Calculation of welfare when the chosen office has n+1 notaries
X_selection1_Q1=select(X_Q1, X_Q1[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q1=select(X_selection1_Q1, X_selection1_Q1[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
X_selection1_Q2=select(X_Q2, X_Q2[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q2=select(X_selection1_Q2, X_selection1_Q2[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
//Calculate firm profits
per_firm_variable_profit=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha) //Calculate per firm variable profits
//Make a zero vector of per-firm profits (we do this in order to get the ordering of the profit data right)
per_firm_profit=range(1,rows(notary_vector),1),J(rows(notary_vector),1,0)
//Fill in the profits of active firms 
for (f=1; f<=rows(per_firm_variable_profit); f++){
id_of_firm=per_firm_variable_profit[f,1]
per_firm_profit[id_of_firm,2]=per_firm_variable_profit[f,2]-fixed_cost*notary_vector[id_of_firm,1] //Subtract the fixed costs to obtain total firm profits
} 
industry_profit[i,1]=sum(per_firm_profit[,2])
//Store the information on how entry affects the profits of the specific notary office where the entry happened
firm_profit_new[i,1]=per_firm_profit[i,2]
//Calculate consumer surplus
consumer_surplus_per_market=consumer_surplus(X_Q1,betas_Q1_new,transportation_cost)+consumer_surplus(X_Q2,betas_Q2_new,transportation_cost)
consumer_surplus[i,1]=sum(consumer_surplus_per_market[,2]) //Calculate total surplus when a notary is added to location i.
//Remove the added notary from the location to get back to the original data.
notary_vector[i,1]=	notary_vector[i,1]-1 //Remove the notary from the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==i),ln(notary_vector[i,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==i),ln(notary_vector[i,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
} //End of loop which goes over all offices, adds 1 notary and calculates expected profits and consumer surplus, and then removes it again.

//Calculate the new welfare due to the addition in each office
welfare_new=industry_profit+lambda*consumer_surplus //Calculate the expected welfare. This is a vector, in which element i stands for the welfare if a notary is added in location i.
//Calculate the marginal effect of the addition, both for welfare and for the profits of the individual notary
welfare_entry=welfare_new-J(rows(welfare_new),1,welfare_old) //Calculate the change in welfare due to adding one more notary to each location.
profit_entry=firm_profit_new-firm_profit_old

//Checking if addition is welfare-enhancing and adding notaries if necessary
//If the profit constraint is assumed to hold, notaries will not enter if they are profit decreasing.
if (profit_constraint==1) {
for(i=1; i<=n_firm; i++) { //i goes over the notaries
//Replace the welfare from addition with the profit after addition, if the profit after addition is below zero. 
//This effectively takes out notary offices where entry is not profitable from the set of notaries considered for addition.
//Profitability only plays a role if it is negative.
if (firm_profit_new[i,1]<0){
welfare_entry[i,1]=firm_profit_new[i,1]
} //End of loop replacing the welfare contribution of unprofitable locations with profits (to make sure they are not selected for addition).
} //End of loop which goes over all locations
} //End of if-condition checking the profit constraint
///Find the most welfare-enhancing location and add a notary there.
//Find the location at which entry is most welfare-enhancing and denote it with max_ID
maxindex(welfare_entry, 1, max_ID=., junk=.) //This returns the ID of the most welfare-enhancing entrant as max_ID.
max_ID=max_ID[1,1]
//Calculate the extra welfare that the most welfare-enhancing entrant makes
extra_welfare=welfare_entry[max_ID,1]
//Add one more notary to the max_ID location, as long as it increases welfare.
if (extra_welfare>0) { //Start of condition checking that welfare gains from additional entry are positive and adding a notary to the most welfare-enhancing location.
notary_vector[max_ID,1]=notary_vector[max_ID,1]+1 //Add the notary to the notary vector where the i-th element shows how many notaries are active in location i.
X_Q1[.,6]=mm_cond((X_Q1[.,1]:==max_ID),ln(notary_vector[max_ID,1]),X_Q1[.,6]) //Change the control variable matrix to reflect this change.
X_Q2[.,6]=mm_cond((X_Q2[.,1]:==max_ID),ln(notary_vector[max_ID,1]),X_Q2[.,6]) //Change the control variable matrix to reflect this change.
//Store the expected welfare if entry happens at max_ID as the new current welfare.
welfare_old=welfare_new[max_ID,1] 
// Calculate the per-firm profits after the addition of the notary.
//Calculation of profits for all offices when the chosen office has n+1 notaries
X_selection1_Q1=select(X_Q1, X_Q1[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q1=select(X_selection1_Q1, X_selection1_Q1[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
X_selection1_Q2=select(X_Q2, X_Q2[.,6]:>=0) //Select those values of X where the log of notaries is >=0, i.e. only use data on active notary offices.
X_selection_Q2=select(X_selection1_Q2, X_selection1_Q2[.,6]:!=.) //Select those values of X where the log of notaries is not missing, i.e. only use data on active notary offices.
//Calculate variable profits per firm
per_firm_variable_profit=per_firm_profit_mult_v2(X_selection_Q1,X_selection_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha) //Calculate per firm variable profits
//Make an matrix in which  to store the profits of the firms. The first column contains the id of the firm, while the second column contains the profits (initially set to zero).
per_firm_profit=range(1,rows(notary_vector),1),J(rows(notary_vector),1,0)
//Fill in the profits of active firms 
for (f=1; f<=rows(per_firm_variable_profit); f++){
id_of_firm=per_firm_variable_profit[f,1]
per_firm_profit[id_of_firm,2]=per_firm_variable_profit[f,2]-fixed_cost*notary_vector[id_of_firm,1] //Subtract the fixed costs to obtain total firm profits
} //End of loop filling in available profits
//Store firm profits as firm_profit_old. 
firm_profit_old=per_firm_profit[.,2]
} //End of condition checking that the welfare increase from additional entry is positive and adding a notary to the most welfare-enhancing location.
} // End of loop checking if adding notaries is welfare-enhancing
//*****************************
//* STEP 4: CHECK CONVERGENCE *
//*****************************
//If there was change in the number of notaries, we need to check again if exit or entry is welfare-enhacing conditional on the new distribution of firms.
notary_vector_change=notary_vector-notary_vector_loop_start
overall_change=sum(notary_vector_change:!=0)
}
newid=X_Q1[.,1]
market_ID_num=X_Q1[.,3]
n_notaries=exp(X_Q1[.,6])
join=J(rows(X_Q1),1,"_")
unique_identifier=strofreal(newid)+join+strofreal(market_ID_num)
	end

************************************************
* 4. Store the outcomes of the counterfactual. *
************************************************
getmata n_notaries, id(unique_identifier)
mata: notary_vector
mata: zero_notaries=sum((notary_vector[.,1]:==0))
mata: zero_notaries
mata: total_notaries=sum(notary_vector)
mata: total_notaries
mata: offices_number=sum((notary_vector[.,1]:>0))
//sca di n_notary_total
su NotPerOff
replace NotPerOff=n_notaries
su NotPerOff
su n_notaries
//Keep only active notary offices
drop if NotPerOff==0
drop if NotPerOff==.
replace lnNotPerOff=ln(NotPerOff)
table lnNotPerOff

//Calculate consumer surplus and producer surplus
sort market_ID newid
mata:
// Select the relevant variables and read them into mata
X_Q1    = st_data(., "$controls_total_Q1")
X_Q2    = st_data(., "$controls_total_Q2")
// nx counts the number of observations
nx    = rows(X_Q1)
// Add an intercept, with length equal to the number of observations and 
X_Q1    = X_Q1,J(nx,1,1)
X_Q2    = X_Q2,J(nx,1,1)

// Calculate per firm profit as a matrix 
Results_ln_Q2=spdemand_log(X_Q2,betas_Q2_new)
Results_ln_Q1=spdemand_log(X_Q1,betas_Q1_new)
Results_Q1=exp(Results_ln_Q1[,2])
Results_Q2=exp(Results_ln_Q2[,2])
Results=Results_Q1+Results_Q2
mean(Results_Q1)
mean(Results_Q2)
output=round(sum(Results))

consumer_surplus_per_market=consumer_surplus(X_Q1,betas_Q1_new,transportation_cost)+consumer_surplus(X_Q2,betas_Q2_new,transportation_cost)
per_firm_variable_profit=per_firm_profit_mult_v2(X_Q1,X_Q2,betas_Q1_new,betas_Q2_new,p_Q1_new,p_Q2_new,nu,aL,phi,aM,alpha)
consumer_surplus=round(sum(consumer_surplus_per_market[,2]))
producer_surplus=round(sum(per_firm_variable_profit[,2])-fixed_cost*total_notaries)
welfare=round(consumer_surplus+producer_surplus)
st_numscalar("output", output)
st_numscalar("consumer_surplus", consumer_surplus)
st_numscalar("producer_surplus", producer_surplus)
st_numscalar("welfare", welfare)
unprofitable_firms=sum((firm_profit_old:<0))
st_numscalar("unprofitable_firms", unprofitable_firms)
end

keep if common==1 //This denotes the local market of a notary
gen test_of_change=NotPerOff-n_notaries
su test_of_change
su NotPerOff
sca total_notaries=r(sum)
sca offices_number=r(N)
bysort market_ID: gen nvals = _n == 1 
count if nvals
sca covered_markets=r(N)
sca uncovered_markets=markets_number-covered_markets
sca change_notaries=total_notaries-total_notaries_current
sca change_offices=offices_number-n_firm
sca change_uncovered_markets=uncovered_markets-uncovered_markets_current
sca change_output=output-output_current
sca change_consumer_surplus=consumer_surplus-consumer_surplus_current
sca change_producer_surplus=producer_surplus-producer_surplus_current
sca change_welfare=welfare-welfare_current
//Table
di total_notaries " & " offices_number " & " uncovered_markets " &  " output " & " change_consumer_surplus " & " change_producer_surplus " & " change_welfare " & " unprofitable_firms
matrix welfare_max=($select_arrond , total_notaries,offices_number,uncovered_markets,output,change_consumer_surplus,change_producer_surplus,change_welfare,unprofitable_firms)

if counter_offices==1 {
mata: welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'_0=st_matrix("welfare_max")
//Store the results as a mata table	
mata: mata matsave "Estimates/Entry_counterfactual/welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'_0" welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'_0, replace
//Store the results as a dta file
rename NotPerOff NotPerOff_welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_0
keep bvdid_num NotPerOff_welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_0
save "Estimates/Entry_counterfactual/welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'_0.dta", replace
}
else{
mata: welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'=st_matrix("welfare_max")
//Store the results as a mata table	
mata: mata matsave "Estimates/Entry_counterfactual/welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'" welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond', replace
//Store the results as a dta file
rename NotPerOff NotPerOff_welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'
keep bvdid_num NotPerOff_welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'
save "Estimates/Entry_counterfactual/welfare_`=$reduction_level_Q1'_`=$reduction_level_Q2'_`=$select_arrond'.dta", replace
}
